Clusterização dos estabelecimentos por semelhança de atributos
yelp_bz <- yelp_bz_raw %>%
select_if(~is.numeric(.)) %>%
mutate_all(~replace(., is.na(.), 0))
glimpse(yelp_bz)
## Rows: 14,962
## Columns: 34
## $ latitude [3m[38;5;246m<dbl>[39m[23m 43.62661, 43.64041, 43.61129, 43.70441, 43.6709…
## $ longitude [3m[38;5;246m<dbl>[39m[23m -79.50209, -79.39058, -79.55687, -79.37511, -79…
## $ review_count [3m[38;5;246m<dbl>[39m[23m 4, 81, 3, 3, 4, 6, 10, 52, 14, 4, 4, 11, 7, 3, …
## $ stars [3m[38;5;246m<dbl>[39m[23m 2.0, 2.5, 1.0, 5.0, 3.0, 4.5, 3.0, 2.5, 3.5, 2.…
## $ AcceptsInsurance [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ AgesAllowed [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Alcohol [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ BYOB [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BikeParking [3m[38;5;246m<dbl>[39m[23m 2, 1, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 2, 0, 1, 0,…
## $ BusinessAcceptsCreditCards [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,…
## $ ByAppointmentOnly [3m[38;5;246m<dbl>[39m[23m 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Caters [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,…
## $ CoatCheck [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Corkage [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,…
## $ DogsAllowed [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DriveThru [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForDancing [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForKids [3m[38;5;246m<dbl>[39m[23m 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 2, 0,…
## $ HappyHour [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ HasTV [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 2,…
## $ NoiseLevel [3m[38;5;246m<dbl>[39m[23m 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ OutdoorSeating [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 2, 1, 2, 0, 0, 2, 1, 1, 1, 0,…
## $ RestaurantsAttire [3m[38;5;246m<dbl>[39m[23m 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 0, 0,…
## $ RestaurantsDelivery [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 2, 0,…
## $ RestaurantsGoodForGroups [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 0,…
## $ RestaurantsPriceRange2 [3m[38;5;246m<dbl>[39m[23m 2, 2, 0, 0, 2, 0, 2, 1, 2, 0, 0, 1, 2, 0, 2, 0,…
## $ RestaurantsReservations [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 1, 0,…
## $ RestaurantsTableService [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,…
## $ RestaurantsTakeOut [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2,…
## $ Smoking [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WheelchairAccessible [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0,…
## $ WiFi [3m[38;5;246m<dbl>[39m[23m 0, 3, 0, 0, 0, 0, 3, 3, 3, 0, 0, 3, 3, 0, 0, 0,…
## $ tips_counter_bz [3m[38;5;246m<dbl>[39m[23m 0, 14, 0, 0, 0, 1, 4, 5, 5, 0, 0, 6, 0, 1, 3, 3…
## $ total_compliments_bz [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
Será aplicada uma análise de componentes principais para entender a variabilidade da nota dos estabelecimentos considerando seus atributos.
rec_pca <- recipe(stars ~ ., yelp_bz) %>%
update_role(contains('id'), new_role = 'id') %>%
#step_date(date_rv, yelping_since_usr, features = c("dow", "month","year")) %>%
#step_other(categories, threshold = 0.005) %>%
#step_other(postal_code, threshold = 0.01) %>%
#step_dummy(all_nominal(), -'business_id',-'user_id',-'name_bz') %>%
step_normalize(all_numeric(), -all_outcomes()) %>%
step_pca(all_numeric(), -all_outcomes()) %>%
step_naomit(all_numeric()) %>%
prep()
yelp_bz_pca <- juice(rec_pca)
variance_pct <- rec_pca$steps[[2]]$res
(cumsum(variance_pct$sdev^2) / sum(variance_pct$sdev^2))
## [1] 0.2795283 0.3569530 0.4181779 0.4760438 0.5246121 0.5650128 0.6019585 0.6384455
## [9] 0.6714870 0.7020952 0.7307504 0.7590915 0.7806310 0.8013364 0.8214466 0.8408425
## [17] 0.8588443 0.8747742 0.8894284 0.9032762 0.9169327 0.9301989 0.9408237 0.9507912
## [25] 0.9598340 0.9678033 0.9751966 0.9818828 0.9879589 0.9930369 0.9974065 1.0000000
## [33] 1.0000000
fviz_eig(variance_pct, addlabels = TRUE) +
labs(x = "Componente Principal",
y = "Percentual explicado da variância")
Mais de 50% da variabilidade é explicada pelas 5 primeiras componentes, que são compostas da seguinte forma:
tidy_pca <- tidy(rec_pca, 2)
tidy_pca %>%
filter(component %in% paste0("PC", 1:6)) %>%
group_by(component) %>%
top_n(15, abs(value)) %>%
ungroup() %>%
mutate(terms = reorder_within(terms, abs(value), component)) %>%
ggplot(aes(abs(value), terms, fill = value > 0)) +
geom_col() +
facet_wrap(~component, scales = "free_y") +
scale_y_reordered() +
labs(
x = "Valor absoluto da contribuição",
y = NULL, fill = "Valor > 0")
Na primeira componente, os pesos são igualmente distribuídos, o que indica que todos os atributos tem impacto semelhante na maior parte da variabilidade.
Pela segunda componente, no entanto, observa-se que a existência de um local para deixar o casaco, ser permitido fumar e ser um bom local para dançar são mais relevante, assim como a localização (PC6). Além disso, cobrança de rolha e necessidade de levar a bebida também são drivers importantes, pois aparecem em mais de uma componente.
variance_pct %>%
fviz_pca_var(axes = c(1,2), col.var="contrib", gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
Os maiores contrastes são entre a modalidade de atendimento dos restaurante: Apenas delivery ou com reservas.
Classificação dos usuários conforme o perfil.
Será utilizada a clusterização k-médias por conta da quantidade de características dos usários presentes na base. Também foi tentada a aplicação de uma clusterização hierárquica, mas os resultados obtidos não foram tão interpretáveis como os seguintes.
set.seed(123)
glimpse(yelp_users)
## Rows: 119,792
## Columns: 23
## $ user_id [3m[38;5;246m<chr>[39m[23m "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars [3m[38;5;246m<dbl>[39m[23m 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note [3m[38;5;246m<dbl>[39m[23m 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool [3m[38;5;246m<dbl>[39m[23m 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans [3m[38;5;246m<dbl>[39m[23m 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count [3m[38;5;246m<dbl>[39m[23m 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny [3m[38;5;246m<dbl>[39m[23m 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr [3m[38;5;246m<dbl>[39m[23m 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful [3m[38;5;246m<dbl>[39m[23m 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since [3m[38;5;246m<dbl>[39m[23m 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
yelp_pad <- yelp_users %>%
select(-user_id) %>%
scale()
kclusts <- tibble(k = 1:30) %>%
mutate(kclust = map(k, ~kmeans(yelp_pad, .x)),
tidied = map(kclust, tidy),
glanced = map(kclust, glance),
augmented = map(kclust, augment, yelp_pad)
)
clusters <- kclusts %>%
unnest(cols = c(tidied))
assignments <- kclusts %>%
unnest(cols = c(augmented))
clusterings <- kclusts %>%
unnest(cols = c(glanced))
### Cotovelo
clusterings %>%
ggplot(aes(k, tot.withinss)) +
geom_point(size = 3) +
geom_line() +
labs(y = "total within sum of squares", x = "k") +
scale_x_continuous(breaks = 1:30)
Pelo gráfico do cotovelo, poderiam ser selecionado um número de clusters (k) de 11 a 17, a seguir é possível ver uma comparacão em relação às dicas.
#k-means
assignments %>%
filter(k %in% paste0(10:20)) %>%
ggplot(aes(x = tips_counter, y = total_compliments)) +
geom_point(aes(color = .cluster), alpha = 0.5) +
facet_wrap(~ k, nrow = 3)
Para a classificação final dos usuário, será feita novamente a clusterização, mas considerando apenas o k ideal.
set.seed(123)
kmeans_usr <- kmeans(yelp_pad, 11)
yelp_usr_cluster <- yelp_users %>%
mutate(cluster_usr = kmeans_usr$cluster)
glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id [3m[38;5;246m<chr>[39m[23m "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars [3m[38;5;246m<dbl>[39m[23m 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note [3m[38;5;246m<dbl>[39m[23m 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool [3m[38;5;246m<dbl>[39m[23m 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans [3m[38;5;246m<dbl>[39m[23m 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count [3m[38;5;246m<dbl>[39m[23m 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny [3m[38;5;246m<dbl>[39m[23m 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr [3m[38;5;246m<dbl>[39m[23m 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful [3m[38;5;246m<dbl>[39m[23m 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since [3m[38;5;246m<dbl>[39m[23m 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cluster_usr [3m[38;5;246m<int>[39m[23m 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9, 3, 1…
Pelo gráfico, observa-se claramente a divisão dos usuários em relação ao tempo na plataforma, a nota média e a quantidade de fãs.
plot_ly(yelp_usr_cluster, x = ~year_since,
y = ~average_stars,
z = ~fans, color = ~cluster_usr,
text = ~paste('Cluster: ', cluster_usr)) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'No Yelp desde'),
yaxis = list(title = 'Nota Média'),
zaxis = list(title = 'Quantidade de fãs')))
yelp_usr_cluster %>%
select(user_id, cluster_usr) %>%
write.csv(file = "output/usr_cluster.csv")
Como próximos passos, seria interessante entnder melhor as características de cada cluster. Para classificar usuário que não estão na base, será utilizado um modelo de árvore para fazer a classificação. O intuito é obter de uma forma rápida o cluster de um novo usuário.
user_cluster_tree <- yelp_usr_cluster %>%
select(-user_id) %>%
rpart(cluster_usr ~ ., data = .)
plot_arvore <- as.party(user_cluster_tree)
#plot(plot_arvore)
yelp_rv <- yelp_raw %>%
#mutate(line = row_number()) %>%
select(-'year_rv') %>%
mutate(stars_rv = replace(stars_rv >= 4,1,0)) %>%
select_if(is.numeric) #%>% sample_frac(0.50)
glimpse(yelp_rv)
## Rows: 219,462
## Columns: 58
## $ average_stars [3m[38;5;246m<dbl>[39m[23m 1.00, 3.21, 1.50, 3.99, 4.23, 3.64, 3.67, 3.91,…
## $ compliment_cool [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, 0, 4,…
## $ compliment_cute [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_funny [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 4, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, 0, 4,…
## $ compliment_hot [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 3,…
## $ compliment_list [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_more [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ compliment_note [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,…
## $ compliment_photos [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_plain [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 2, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 10…
## $ compliment_profile [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_writer [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 1,…
## $ cool [3m[38;5;246m<dbl>[39m[23m 0, 2, 0, 35, 45, 1, 0, 37, 0, 0, 17, 0, 8, 0, 0…
## $ elite_count [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ fans [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 7, 0, 0, 9, 0, 0, 4, 0, 1, 0, 0, 2,…
## $ friends_count [3m[38;5;246m<dbl>[39m[23m 12, 1, 1, 44, 31, 8, 1, 147, 1, 20, 34, 1, 3, 1…
## $ funny [3m[38;5;246m<dbl>[39m[23m 0, 1, 0, 18, 27, 0, 1, 20, 0, 0, 2, 0, 9, 0, 0,…
## $ review_count_usr [3m[38;5;246m<dbl>[39m[23m 1, 19, 2, 105, 279, 14, 3, 190, 2, 4, 38, 1, 13…
## $ useful [3m[38;5;246m<dbl>[39m[23m 0, 1, 1, 90, 99, 1, 1, 100, 1, 1, 19, 1, 53, 0,…
## $ year_since [3m[38;5;246m<dbl>[39m[23m 2017, 2016, 2017, 2011, 2017, 2017, 2015, 2014,…
## $ tips_counter [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 1, 1, 5, 1, 9, 0, 1, 0, 0, 5, 0, 0, 16…
## $ total_compliments [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ cluster_usr [3m[38;5;246m<dbl>[39m[23m 7, 13, 7, 11, 12, 13, 3, 12, 13, 7, 9, 7, 3, 13…
## $ stars_rv [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,…
## $ latitude [3m[38;5;246m<dbl>[39m[23m 43.64041, 43.64041, 43.64041, 43.64041, 43.6404…
## $ longitude [3m[38;5;246m<dbl>[39m[23m -79.39058, -79.39058, -79.39058, -79.39058, -79…
## $ review_count [3m[38;5;246m<dbl>[39m[23m 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81,…
## $ stars [3m[38;5;246m<dbl>[39m[23m 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.…
## $ AcceptsInsurance [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ AgesAllowed [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Alcohol [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ BYOB [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BikeParking [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ BusinessAcceptsCreditCards [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ ByAppointmentOnly [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Caters [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ CoatCheck [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ Corkage [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DogsAllowed [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ DriveThru [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ GoodForDancing [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ GoodForKids [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ HappyHour [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ HasTV [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ NoiseLevel [3m[38;5;246m<dbl>[39m[23m 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,…
## $ OutdoorSeating [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsAttire [3m[38;5;246m<dbl>[39m[23m 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ RestaurantsDelivery [3m[38;5;246m<dbl>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ RestaurantsGoodForGroups [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsPriceRange2 [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsReservations [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ RestaurantsTableService [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ RestaurantsTakeOut [3m[38;5;246m<dbl>[39m[23m 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ Smoking [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WheelchairAccessible [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ WiFi [3m[38;5;246m<dbl>[39m[23m 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,…
## $ tips_counter_bz [3m[38;5;246m<dbl>[39m[23m 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ total_compliments_bz [3m[38;5;246m<dbl>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
split <- initial_split(yelp_rv, prop = 0.8 , strata = stars_rv)
train_val <- training(split)
split_val <- initial_split(train_val, prop = 0.5, strata = stars_rv)
yelp_train <- training(split_val)
yelp_val <- testing(split_val)
yelp_test <- testing(split)
mean <- yelp_train %>%
select(-stars_rv) %>%
apply(., 2, mean)
std <- yelp_train %>%
select(-stars_rv) %>%
apply(., 2, sd)
x_train <- yelp_train %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_train)
## [1] 87786 57
y_train <- yelp_train %>%
select(stars_rv) %>%
as.matrix()
x_val <- yelp_val %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_val)
## [1] 87785 57
y_val <- yelp_val %>%
select(stars_rv) %>%
data.matrix()
dim(x_val)
## [1] 87785 57
x_test <- yelp_test %>%
select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
dim(x_test)
## [1] 43891 57
y_test <- yelp_test %>%
select(stars_rv) %>%
data.matrix()
rm(yelp_nn)
yelp_nn <- keras_model_sequential() %>%
layer_dense(units = 30, activation = "tanh", input_shape = ncol(x_train)) %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 16, activation = "relu") %>%
#layer_dropout(rate = 0.5) %>%
layer_dense(units = 16, activation = "relu") %>%
#layer_dense(units = 6, activation = "softmax")
layer_dense(units = 1, activation = "sigmoid")
yelp_nn %>%
compile(optimizer = "rmsprop",
#loss = "sparse_categorical_crossentropy",
loss = "binary_crossentropy",
metrics = c("accuracy"))
history <- yelp_nn %>%
fit(x_train, y_train,
epochs = 40, batch_size = 512,
validation_data = list(x_val, y_val))
plot(history)
## `geom_smooth()` using formula 'y ~ x'
keras::get_weights(yelp_nn)
## [[1]]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.6094545722 -0.4749614298 0.3541086018 -0.4428369701 -0.655121207
## [2,] -0.0394935459 -0.0203993767 -0.1796677858 0.0410629697 0.235627651
## [3,] 0.1150243357 -0.1785386056 0.0938132852 0.2151044756 -0.203980863
## [4,] 0.1204489470 0.1927195936 0.1893769354 0.2431167513 0.237795889
## [5,] -0.1355855465 0.1677446961 0.1107519418 -0.0383688547 0.097957835
## [6,] -0.1350986511 -0.0802458376 0.1391153187 -0.2267953604 0.281118870
## [7,] -0.1834388673 -0.1196382940 0.1005086154 -0.0146540077 -0.133926347
## [8,] -0.0952792019 0.1544742584 -0.2102304250 -0.2317446619 0.097554296
## [9,] -0.1308181882 -0.0727197677 -0.0194583517 -0.1283243895 -0.063994192
## [10,] 0.1468087435 -0.2453448027 0.1376343518 0.1514547914 -0.336096257
## [11,] 0.1967902184 0.1673227400 0.1810142547 0.2003618628 0.094515443
## [12,] -0.0111451307 -0.1536421180 0.1011863351 -0.2090851218 0.189533710
## [13,] 0.0747257397 -0.0180378761 0.1988334507 0.0174768157 0.057531830
## [14,] -0.0511272773 -0.0033279194 0.0958851576 -0.0243877731 -0.160894617
## [15,] -0.1030690670 0.1699993312 -0.1041736081 0.1925117522 0.061366778
## [16,] -0.0002413686 -0.0200763997 0.0559247844 -0.0111677572 -0.022314997
## [17,] 0.1810127795 0.0756299868 0.0665282831 -0.0539779700 -0.057382375
## [18,] 0.0592285581 0.0620267503 -0.2536863089 -0.0653754324 0.197270438
## [19,] -0.0230310708 -0.0228785779 0.1963366419 -0.0242696255 -0.163470387
## [20,] -0.0660066232 0.0107743675 0.0233811066 -0.0547488891 -0.063670598
## [21,] -0.0003246044 -0.0106878700 0.0441279933 -0.0174635574 -0.167306900
## [22,] 0.1243580952 0.0525388010 -0.1667653620 0.0599543974 0.213982433
## [23,] -0.0364245698 0.0085796192 0.0120518366 -0.0001108671 0.019147670
## [24,] 0.0449368320 -0.0372285992 0.0021126377 -0.0032062093 -0.031016760
## [25,] -0.0257117711 0.0339759737 0.0275775362 0.0202764496 0.015631739
## [26,] 0.0435284376 0.0673498660 0.2094453722 0.0074568819 -0.118998326
## [27,] 0.3811446726 -0.6182153821 0.6504760981 -0.1554894447 -0.336833626
## [28,] 0.0901296288 -0.0315203555 0.0722742900 -0.0354919881 0.006983003
## [29,] -0.0115256310 0.0609332919 -0.0029702515 0.0420379825 -0.361402273
## [30,] 0.0116669172 0.0189346205 -0.1175204739 -0.0113584073 0.029726485
## [31,] -0.0100175468 -0.0637853891 0.0558892600 0.0198947191 -0.187102616
## [32,] 0.0195637234 0.0504271016 0.0308048837 0.0322431400 0.025857905
## [33,] 0.0187816266 0.0051208157 0.0070411614 0.0050138412 0.037859101
## [,6] [,7] [,8] [,9] [,10]
## [1,] 0.4371570349 -0.549427748 -3.868998e-01 0.405837446 0.3706296086
## [2,] 0.0523206964 -0.089967407 9.482827e-02 -0.194767714 -0.0493119918
## [3,] -0.2296333015 0.208597079 -1.781916e-01 0.196854204 -0.3198908865
## [4,] -0.2835386395 0.123967148 1.170011e-01 0.013133104 0.2885253727
## [5,] -0.1260014921 -0.252608389 -1.375182e-01 -0.209502280 -0.1215947941
## [6,] -0.1164530739 -0.041656960 1.781321e-01 0.021474242 -0.3044258952
## [7,] 0.2701379359 0.045553014 -1.200169e-01 -0.045938756 0.1225088462
## [8,] -0.0801308826 0.072096616 1.932938e-02 0.120933078 -0.0132030519
## [9,] -0.3159334660 -0.242813870 8.762016e-02 0.038546912 -0.1922984868
## [10,] -0.1963677555 -0.075676203 -1.673460e-01 0.007502505 -0.0474456102
## [11,] 0.2045760155 0.176992431 -8.338238e-02 -0.081978336 -0.2257487625
## [12,] -0.2124243081 0.233003959 -9.562983e-02 -0.025574611 -0.2030904591
## [13,] 0.1450067908 0.049171355 1.562534e-02 0.145802513 -0.0742091089
## [14,] 0.0481342450 0.373851746 3.702985e-02 -0.002289793 -0.0307355747
## [15,] -0.1546295434 0.081347331 -5.477034e-04 -0.044991374 -0.0458727926
## [16,] 0.0293789580 0.364131451 4.198668e-02 -0.069224298 -0.1014279425
## [17,] -0.0094344169 0.139708593 -3.092309e-02 -0.140755132 0.4060024917
## [18,] 0.0259873960 -0.241443291 -2.368002e-02 -0.004487220 0.1961882114
## [19,] -0.0006819399 0.254184008 1.072929e-01 0.155561551 0.0436611250
## [20,] 0.0264473986 -0.049973533 3.007169e-02 -0.098555148 -0.0347826928
## [21,] 0.0748036727 -0.066090897 -6.072568e-03 -0.033720732 0.1098008752
## [22,] 0.0226566643 -0.326154292 2.673029e-02 -0.090960182 -0.2199163139
## [23,] 0.0311658382 0.015701128 -1.953179e-02 0.110052265 0.0807325244
## [24,] -0.0046645310 -0.021014942 -2.701281e-02 -0.017125441 0.0196815114
## [25,] 0.0233575180 -0.029191423 3.468003e-02 -0.023488579 -0.0293543097
## [26,] -0.0615089722 0.070390500 7.595991e-02 -0.054819535 -0.0838055238
## [27,] 0.5328783989 -0.398405194 -2.506301e-01 0.243116111 0.1920093447
## [28,] 0.0560581051 -0.012278790 1.283942e-02 -0.025038337 -0.0206865184
## [29,] -0.1921747625 -0.184246510 1.339100e-02 -0.015998797 -0.0256808102
## [30,] 0.0579077415 -0.182403922 -3.786369e-02 0.114237718 0.1584116817
## [31,] 0.0576090589 -0.129636660 3.967950e-03 -0.031206505 -0.2018909156
## [32,] 0.0105164815 0.003837890 4.117323e-02 -0.003388634 0.0009648166
## [33,] -0.0432431996 0.013813736 2.640472e-02 -0.024962518 -0.0607946031
## [,11] [,12] [,13] [,14] [,15]
## [1,] -0.712211490 -0.814891696 -4.627850e-01 -0.4221532643 4.964411e-01
## [2,] 0.003752558 -0.163875610 1.697819e-01 0.1219822839 1.495462e-01
## [3,] -0.395516247 -0.134433702 -1.326848e-01 -0.3073993623 4.475738e-02
## [4,] -0.053086538 0.028254971 -7.681192e-02 -0.3050606847 -2.329542e-02
## [5,] -0.176918015 0.217227578 1.397067e-01 0.2211650312 -1.381306e-01
## [6,] -0.118044302 0.042316027 2.703551e-02 0.0583707243 1.227634e-01
## [7,] 0.216732129 0.020869369 -1.027057e-01 0.1557499021 -9.008349e-02
## [8,] 0.173784196 -0.061088718 -2.553344e-01 -0.0295583904 3.459723e-03
## [9,] 0.018508460 -0.175199211 1.421986e-01 -0.2569884360 1.214819e-01
## [10,] 0.082435526 0.141205803 -2.661248e-02 0.1158676893 4.850834e-02
## [11,] 0.141190588 0.029337367 -1.745256e-01 -0.0171205103 -6.709188e-02
## [12,] 0.318803042 0.027894434 -2.302154e-02 0.1334467083 1.858229e-02
## [13,] -0.126403242 -0.097423606 1.249164e-02 -0.0165321343 -1.633328e-01
## [14,] 0.363193780 -0.109778985 -1.486473e-02 0.0500467047 -9.483209e-03
## [15,] 0.006183926 -0.049541455 7.725440e-03 -0.1349606216 -1.938693e-02
## [16,] -0.134588003 -0.065045215 -6.500528e-02 -0.0876507685 2.208129e-02
## [17,] 0.125324413 0.129353851 -2.383952e-01 0.1436266154 -8.790759e-02
## [18,] -0.068014719 0.074475139 -3.262171e-02 -0.0430050902 -3.569449e-02
## [19,] -0.175914660 0.145224795 9.274055e-02 0.0898520648 1.324165e-01
## [20,] -0.133582249 -0.038385905 -5.717372e-02 0.0268184878 -1.583815e-02
## [21,] 0.083862863 -0.112628222 9.954699e-02 -0.1008881330 -7.552805e-02
## [22,] 0.113533318 0.286359876 3.438710e-02 0.0653542578 -4.875514e-02
## [23,] -0.069520079 0.079893745 4.156201e-02 -0.0070015914 8.301085e-03
## [24,] 0.055608947 0.052050248 -2.159952e-02 0.0090787429 1.118529e-02
## [25,] -0.103429146 -0.051067084 2.390502e-02 -0.0452804379 6.882745e-03
## [26,] -0.154529229 0.076968022 1.548869e-01 -0.0234221518 1.418374e-01
## [27,] -0.080818892 -0.238186851 -2.097901e-01 -0.1325931698 4.622724e-01
## [28,] -0.049345657 -0.068211168 -2.736917e-02 0.0247267243 3.361655e-02
## [29,] 0.004388327 -0.002416073 -2.716474e-03 0.6005147099 4.814963e-03
## [30,] 0.159975827 -0.011521813 -2.145228e-02 0.0624429435 -8.652613e-02
## [31,] -0.071347289 0.063049249 -6.296512e-02 0.0312637240 5.577638e-03
## [32,] -0.081594542 0.096100681 1.897288e-02 0.0057865889 1.770914e-02
## [33,] 0.024433728 0.013047420 6.315423e-02 0.0295099635 9.849215e-03
## [,16] [,17] [,18] [,19] [,20]
## [1,] 0.729519248 -0.4654282033 0.6145101786 -5.230303e-01 0.404629558
## [2,] -0.004454987 0.0532902069 -0.2886953056 -1.309147e-01 -0.065638825
## [3,] 0.173774227 -0.1321456581 0.0710461140 -3.485890e-03 -0.034064066
## [4,] 0.145125508 -0.1716645509 0.1654713005 2.057486e-01 -0.029826207
## [5,] -0.003747611 0.1350643933 0.0993557647 -2.103981e-02 0.085773699
## [6,] 0.134351239 -0.0820732787 -0.1958646923 -8.698031e-02 0.102243073
## [7,] -0.169470713 -0.1753619909 -0.1263721138 7.481533e-02 0.167956844
## [8,] -0.109498024 0.2787225246 0.1476476640 8.098894e-02 0.054857537
## [9,] 0.062124364 -0.0796455517 -0.0070934203 1.331812e-01 -0.238949537
## [10,] 0.208020911 -0.2221402228 0.0194950998 -9.073591e-03 -0.037457652
## [11,] -0.075304605 -0.0308459233 0.1307676584 8.391162e-02 0.174672693
## [12,] -0.016913688 -0.0390206575 -0.0773258507 -9.717230e-02 -0.165651381
## [13,] -0.058870152 0.1769033968 0.1474724859 -2.443802e-01 -0.085037738
## [14,] -0.057192788 -0.0006478874 0.0270918347 -2.848697e-02 0.017888863
## [15,] 0.111433178 0.0412853882 0.1174396500 1.713726e-02 -0.015405086
## [16,] 0.030472958 0.0248477180 -0.0391061865 -1.172271e-02 0.187212467
## [17,] -0.036773186 0.1739047021 0.2563164532 -4.634838e-02 -0.013424500
## [18,] 0.054487642 -0.1516308039 -0.0713790730 1.363569e-01 -0.193462729
## [19,] -0.009195495 0.0108222049 0.0386067927 8.010945e-02 0.264992446
## [20,] 0.012713622 -0.0024407080 -0.0201236960 3.243012e-02 -0.125032574
## [21,] -0.013368972 0.0958308280 -0.0161917042 5.893053e-02 0.013056211
## [22,] 0.116157219 -0.0707323328 0.0534757227 -1.633515e-01 0.046397094
## [23,] -0.021190075 -0.0571015254 0.0270981286 -2.595115e-02 -0.039416734
## [24,] 0.066619299 0.0138028972 0.0588065647 -3.545853e-02 0.041930404
## [25,] 0.013491044 0.0224835705 -0.0332784057 -2.427808e-02 -0.038671494
## [26,] -0.179629818 -0.1128255203 0.0715885758 -3.118518e-02 -0.078097954
## [27,] 0.279059350 -0.6783709526 0.2832680941 -4.470558e-01 0.651077688
## [28,] 0.002812791 0.0023631756 0.0107442439 -3.474668e-02 0.044437643
## [29,] 0.555992782 -0.0300672445 0.4324532449 -3.584648e-02 -0.421870410
## [30,] -0.047303200 0.0584600084 0.0606797189 8.731254e-02 0.006997220
## [31,] 0.002551844 0.0417271927 -0.0005584795 4.409665e-03 -0.082695089
## [32,] -0.016753154 -0.0642376095 -0.0155083248 -5.260266e-02 -0.107592255
## [33,] 0.028681222 -0.0017997429 -0.0130134020 -3.148418e-03 -0.030467739
## [,21] [,22] [,23] [,24] [,25]
## [1,] -0.3680885136 -0.882711530 0.3355892599 0.4466132522 -0.6353719831
## [2,] 0.0624390021 0.004909080 0.1503461301 -0.0781597272 -0.1896225661
## [3,] 0.0559851378 -0.024810567 0.0572828688 0.0461744368 0.2619299293
## [4,] -0.2254979610 0.183361381 0.0463441350 0.0584993586 0.2573947608
## [5,] -0.1350592375 -0.087268651 0.0175623465 -0.0704893544 -0.0831288770
## [6,] 0.4114254117 -0.212691322 -0.0791730583 -0.0579334423 0.2607204616
## [7,] 0.0664711446 -0.343663871 -0.0485427082 -0.0594763421 -0.2685116529
## [8,] 0.0747417137 -0.027526913 -0.1835471094 0.0626802146 -0.1626054049
## [9,] -0.1591280997 -0.207008615 0.0937459543 0.0849606395 0.0113023007
## [10,] -0.3139918149 -0.013878560 -0.0898885429 0.1633947641 -0.0541496873
## [11,] -0.1042657048 -0.030535419 -0.0858046040 0.0965495780 0.0624740906
## [12,] 0.1701129079 0.351063460 0.0890370384 0.0040065814 -0.0632404089
## [13,] 0.0456270128 -0.101292893 -0.0614905544 -0.1011991799 0.0405284464
## [14,] 0.1203461811 -0.103685878 -0.0313399024 -0.1330443770 0.0897198692
## [15,] 0.1987297088 0.109246142 0.0617827810 -0.0083231479 0.2270543426
## [16,] 0.0263581257 -0.018610211 0.3136822581 -0.0930772647 0.1616401076
## [17,] 0.0107498039 -0.128670648 -0.1241332442 -0.2704400420 -0.0477555208
## [18,] -0.2602904141 -0.028312622 -0.0552988015 0.2057229131 -0.0640010610
## [19,] 0.1337099373 -0.247681186 -0.0206083953 0.1539038718 0.1677477658
## [20,] 0.0990961641 -0.083855487 -0.0913274512 -0.0056753457 -0.0714150220
## [21,] -0.0254487991 0.023942169 -0.0749411359 0.0382329486 0.0080905873
## [22,] -0.0290326513 0.078796923 0.1545740068 -0.2348472476 -0.1423379183
## [23,] -0.0195809733 0.060195632 0.0340739228 0.0192825738 -0.0762929171
## [24,] 0.0311685856 -0.028782271 0.0416885912 0.0239278264 -0.0604248084
## [25,] 0.0032830751 -0.084785134 0.0056296163 -0.0001479712 -0.0143211288
## [26,] 0.1091492772 0.013628869 -0.0410011485 0.1119457558 0.0175196454
## [27,] -0.2237438709 -0.109332383 0.7220876813 0.4136667252 -0.6096450090
## [28,] 0.0237390697 -0.017343374 0.0502511784 0.0266055129 -0.0091913538
## [29,] 0.5266930461 -0.008614014 0.0288121570 -0.4168978631 -0.3681140542
## [30,] 0.0532448329 -0.028833941 -0.1195237041 -0.0558011681 -0.1274895668
## [31,] -0.0333146304 -0.246807039 0.0234096237 0.0342662632 -0.0878212303
## [32,] 0.0132465381 0.001084760 -0.0313191786 -0.0701729953 0.0241898596
## [33,] -0.0145956855 0.039887041 -0.0225421954 0.0058204238 0.0057638520
## [,26] [,27] [,28] [,29] [,30]
## [1,] 0.786992133 -0.4199651182 -0.6453723907 0.413181275 -0.592337310
## [2,] 0.023518993 0.1333266646 0.0062887268 -0.286748946 -0.226348698
## [3,] -0.140591189 -0.0559961386 -0.0637828335 -0.236449122 -0.009940902
## [4,] -0.179455787 -0.0763119981 -0.1757344753 0.059410382 0.201968879
## [5,] 0.065350711 -0.1417763680 0.1418754160 -0.114062257 0.035366453
## [6,] 0.239979133 -0.0471462272 -0.0898081586 -0.073613748 -0.102842510
## [7,] -0.155579731 -0.1221001372 0.1170393452 -0.122735851 0.180331156
## [8,] -0.058924451 -0.0088449009 0.1321617365 0.208066896 -0.181331381
## [9,] -0.225022912 -0.1813999116 -0.0628715381 -0.169594198 0.152810901
## [10,] 0.081566341 -0.0032833014 -0.1415512562 0.119351767 0.107442714
## [11,] -0.215007395 0.3074276745 -0.0036737623 -0.105892494 0.156690538
## [12,] -0.144472018 0.0791129619 0.1493418068 0.020660473 -0.149570882
## [13,] 0.343043655 -0.0573783070 -0.0349638350 -0.159750924 -0.020763678
## [14,] 0.022276012 -0.0147032123 -0.0149039906 0.110731550 -0.039397981
## [15,] 0.004534894 0.0813782662 -0.0871411487 -0.180779532 -0.147106886
## [16,] -0.001338653 0.0314606689 -0.0299415495 -0.339367270 0.055473525
## [17,] 0.154076636 0.0083227297 -0.0878422335 -0.212868705 0.027361661
## [18,] 0.087198228 0.0803213418 0.0289936457 0.234723255 0.115321316
## [19,] -0.125790656 -0.1565635204 0.0573705919 0.080314465 -0.205859244
## [20,] 0.015107184 0.1255112886 0.0411323011 -0.014875833 0.040497981
## [21,] 0.023579726 0.1095345318 -0.0751808435 -0.034047056 -0.098379388
## [22,] -0.020787979 -0.0062152846 0.1003907770 0.203302294 0.169440329
## [23,] -0.003423488 -0.0753919184 0.0107697165 0.083153233 -0.008059621
## [24,] 0.044162363 -0.0118014663 -0.0397045836 -0.012474179 0.001924350
## [25,] -0.010680303 0.0138532100 0.0516060814 -0.092330292 0.015218947
## [26,] -0.166870520 0.0170757025 -0.0686621740 0.092716753 0.104306147
## [27,] 0.150313973 -0.2784928083 -0.2209264040 0.200270861 -0.399393409
## [28,] 0.034770533 0.0100712227 -0.0161639899 -0.049843185 0.011308619
## [29,] 0.018338023 0.8713814616 -0.0079655489 -0.499762684 0.012017766
## [30,] 0.007809259 -0.0065824399 0.0303290002 0.046939999 0.050758846
## [31,] 0.019565228 -0.0268781278 -0.0151643241 0.010443625 0.011564441
## [32,] -0.012393128 0.0232871454 0.0016280233 0.025043914 -0.031051833
## [33,] -0.004652527 0.0222678464 -0.0041528922 -0.010910623 -0.093671434
## [ reached getOption("max.print") -- omitted 24 rows ]
##
## [[2]]
## [1] -0.282026708 0.086308412 0.133914471 -0.001192288 0.103316717 0.079597764
## [7] -0.009325965 -0.510441244 0.547421157 0.319709688 -0.043573927 0.246097714
## [13] -0.056712832 -0.369696021 -0.222868904 -0.339687854 -0.105103947 -0.243444875
## [19] 0.320311934 0.258757710 -0.455855787 0.249252781 -0.071390003 0.325902939
## [25] 0.126275852 -0.411053032 -0.609874368 0.347979546 0.326663256 0.264243215
##
## [[3]]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.232501313 0.2928415239 0.140565038 0.17740002 0.10006279 0.066112690
## [2,] 0.205525517 0.1418457329 0.009758664 -0.48773465 0.36523667 -0.218028933
## [3,] 0.020741394 0.0304890051 -0.123063497 0.37984690 -0.25943899 0.392152399
## [4,] -0.155277163 -0.0624808557 0.020432051 0.16997768 0.28997821 0.235081673
## [5,] 0.159783885 0.0703252107 -0.162827387 -0.06890922 0.21150997 -0.032116160
## [6,] -0.148045331 0.1159380972 -0.232500330 0.35666385 -0.30366972 0.160232663
## [7,] 0.154707178 0.0291703604 0.218087405 -0.45685944 0.25949037 0.037612703
## [8,] 0.196835354 -0.1299752146 0.045175385 0.08499885 0.46906418 0.088327512
## [9,] -0.000859863 0.0507858098 0.418575317 0.22775583 -0.24715468 -0.028326135
## [10,] -0.082640402 -0.1334992349 -0.019941386 -0.05306740 -0.05850421 -0.250088423
## [11,] 0.218846887 0.0056977342 0.049929272 -0.22007853 -0.04331355 0.096167661
## [12,] -0.112917952 -0.1980956942 -0.296761215 -0.21937050 0.34707263 0.251576781
## [13,] 0.071033612 -0.0838846564 -0.169516370 0.19255896 0.41633186 0.078964733
## [14,] 0.131340623 -0.0873627216 -0.426907361 -0.05536006 0.19281033 0.226786420
## [15,] -0.140042052 0.2809669375 0.045318451 0.13574021 0.11916186 0.317792147
## [16,] -0.058679663 0.4991959035 0.313833565 0.12535290 -0.34603131 -0.012346624
## [17,] 0.068266161 0.0527788028 -0.146560714 0.12513834 0.31015033 -0.395230949
## [18,] -0.282295495 0.1149619743 0.043742247 0.19977170 -0.11052866 0.089333013
## [19,] 0.001007761 -0.3222911954 -0.141588986 -0.08123756 0.01618466 -0.346295059
## [20,] -0.516050577 -0.0778190047 0.083206594 -0.27252418 0.04854503 0.428480864
## [21,] 0.175282553 -0.0409848653 0.164469555 -0.28640541 0.19380839 0.167012900
## [22,] 0.283727527 -0.1300850660 -0.395898968 0.26406237 0.12497630 0.223059118
## [23,] -0.304906577 0.0002150437 0.066671893 0.09744377 0.02524569 0.516375661
## [24,] -0.066698149 0.1708828509 -0.289151698 0.16707940 -0.35984218 0.020823417
## [25,] -0.115078464 -0.0027103587 0.019944256 -0.12949750 0.24461739 -0.188461855
## [26,] -0.166105106 0.2796464264 0.300099224 -0.24221039 -0.11642620 -0.181874424
## [27,] -0.037257303 -0.0642147139 0.059338130 0.23555154 0.22129133 -0.231831908
## [28,] -0.341551542 -0.7165807486 -0.133359149 0.16862960 0.13081951 0.004461672
## [29,] 0.061354119 -0.0723449141 -0.196549684 0.18271901 -0.09029669 -0.146544859
## [30,] 0.057799038 -0.1444665939 -0.127835959 0.16654998 -0.01204011 -0.271814734
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 0.21311715 0.01846009 -0.11757247 -0.243426323 -0.3862234652 -0.118649416
## [2,] -0.23126714 -0.16383663 0.11887081 0.197523654 -0.0588766374 0.002542035
## [3,] -0.14133099 0.09135992 -0.16837624 -0.148576155 -0.0887190849 -0.014054355
## [4,] 0.31898504 -0.17690499 -0.43045002 0.381945610 0.2868536711 0.098214343
## [5,] -0.03951347 -0.11831224 -0.30231673 0.383829623 -0.0738805085 0.059082240
## [6,] -0.32521757 -0.20907737 -0.40785715 0.025166268 0.1014736965 -0.346234292
## [7,] 0.03041033 0.23221712 -0.08099909 0.003636565 -0.0430878848 0.166518599
## [8,] 0.36966339 0.47302607 0.02248991 0.008880679 0.1063629240 -0.040956013
## [9,] 0.09972992 -0.51671368 0.02210282 -0.148625836 -0.5440436006 0.204965234
## [10,] -0.18252677 -0.33520475 0.08321787 -0.248535186 -0.3225192726 0.139201820
## [11,] 0.15886034 -0.04634874 -0.32505721 0.184814841 0.1771183312 -0.032010544
## [12,] -0.08538136 -0.26715699 -0.37737936 0.089217551 0.2128878087 0.098186992
## [13,] -0.04257543 0.13215458 -0.21896927 -0.189680472 0.2404793352 0.432078153
## [14,] 0.33146599 0.39867631 -0.05683200 -0.021324966 0.1083126441 -0.046648916
## [15,] 0.12385551 -0.04911806 -0.23281147 -0.451307118 -0.3546159267 -0.428412825
## [16,] 0.16581391 0.12996516 0.12464382 0.366083175 -0.0534083582 -0.058116075
## [17,] -0.02808530 0.01407588 0.11175124 -0.016355790 -0.0785713792 0.086118408
## [18,] -0.13918613 -0.28295180 0.29532924 0.104088269 0.2270492762 -0.094929419
## [19,] 0.05106626 0.25002211 0.13070023 -0.126312330 -0.0589390993 0.286906868
## [20,] -0.23301166 0.00126946 -0.43899667 0.106028162 -0.3812254667 -0.251451463
## [21,] 0.28314322 0.21671551 0.02784175 0.009237651 0.5404528379 -0.235623732
## [22,] 0.19582392 0.03794044 -0.47519511 0.133860067 -0.1277690679 0.147167236
## [23,] -0.26037890 -0.17749214 -0.22940615 0.199379802 -0.0269921292 -0.294040829
## [24,] -0.34782976 0.10589155 0.21345676 -0.161668837 -0.1334689558 0.002756604
## [25,] -0.04485632 0.07684859 -0.01600491 0.048689399 0.0008669847 0.248176157
## [26,] -0.08734338 0.02546140 0.54945630 -0.234910369 0.0205506999 -0.153706104
## [27,] 0.14280492 0.64715296 -0.24673127 -0.018834081 0.5944663882 -0.010605807
## [28,] 0.14914832 0.16687293 -0.15919809 -0.301595658 0.1982628405 0.360702783
## [29,] -0.37942412 -0.08501843 0.18151483 -0.230215773 -0.3724642098 0.159492224
## [30,] 0.25246176 0.10979345 -0.09463172 0.389581740 -0.2762593627 -0.181715548
## [,13] [,14] [,15] [,16]
## [1,] 0.23845367 0.391559780 0.37299955 -0.04766696
## [2,] -0.27139577 -0.104887053 0.21994556 0.09419525
## [3,] -0.39177430 -0.241600037 0.15973774 0.01045218
## [4,] -0.03676064 -0.396023393 -0.43245903 0.10610139
## [5,] -0.23092407 -0.099788874 -0.07021088 0.04591432
## [6,] -0.10818826 0.035178673 0.16942944 0.11824833
## [7,] -0.28309378 0.024497485 -0.05137254 0.24311860
## [8,] 0.06910275 -0.381036431 0.10132520 0.04928410
## [9,] -0.09075980 0.357842714 -0.12500729 -0.37084690
## [10,] 0.35088387 0.404264748 0.25987670 0.23078181
## [11,] 0.02226515 0.327820390 -0.27323741 0.18857399
## [12,] 0.10886248 -0.270791233 -0.23130977 -0.24709608
## [13,] -0.03463710 -0.252579361 -0.07026480 -0.31591323
## [14,] -0.16237760 0.131310210 0.07766928 -0.23603871
## [15,] 0.22639863 0.001549316 0.23534317 0.02917634
## [16,] 0.30821735 0.099688768 -0.20200522 -0.14240889
## [17,] 0.12074252 -0.326333433 0.15549643 -0.22406387
## [18,] 0.35737488 0.139834136 0.17556410 -0.21544121
## [19,] -0.09059615 -0.069962151 -0.40033683 -0.28426585
## [20,] 0.09648057 0.017393710 0.06967809 0.25287643
## [21,] -0.21507838 -0.203084618 0.37212044 -0.32377043
## [22,] -0.14056782 -0.080417655 -0.21311249 -0.53956467
## [23,] -0.14297482 -0.129229724 -0.12896508 0.11350271
## [24,] -0.16924819 0.202244282 -0.21844734 -0.04844424
## [25,] -0.30253014 -0.312072754 0.42593831 -0.02449828
## [26,] 0.35514465 0.291752279 0.42608583 -0.16911922
## [27,] 0.01893734 0.256303757 0.15037212 -0.12385029
## [28,] -0.22626632 -0.231029078 -0.36930919 -0.01774714
## [29,] 0.04229600 0.169606328 0.27788422 -0.02444237
## [30,] -0.41787338 0.012206912 -0.20642003 -0.11539765
##
## [[4]]
## [1] -0.159209728 0.146082073 0.101375431 0.165886909 0.055261008 0.344417363
## [7] 0.064184308 -0.080298379 0.056914642 0.028675580 -0.097928695 0.003217116
## [13] 0.116863646 0.156676501 0.143953085 0.114630781
##
## [[5]]
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.152282700 0.038523350 0.38653836 -0.380773634 -0.15481828 -0.26896128
## [2,] -0.002409261 -0.300699145 -0.25566009 -0.054319087 -0.15619576 0.29573238
## [3,] -0.061251935 -0.525169313 -0.41466665 -0.167260155 0.26350382 -0.16835609
## [4,] -0.203771964 -0.235662818 0.03283354 0.397095561 0.10239325 0.13889283
## [5,] 0.288018912 0.202324927 0.29104206 -0.004043519 -0.27187756 -0.10990795
## [6,] -0.143009201 0.003154357 -0.46749309 -0.052855950 0.30990499 0.32147878
## [7,] -0.253284097 0.196982473 0.13386874 0.430480480 0.22529909 -0.16578899
## [8,] 0.038224529 0.114053115 0.04370245 0.087276757 -0.11211437 0.35493433
## [9,] -0.060944010 0.017792344 -0.22335069 0.302471220 0.42281753 0.47397637
## [10,] 0.026804868 0.054076541 -0.16936885 -0.082224429 -0.16018406 0.09136956
## [11,] 0.106467925 0.060271613 0.36786208 0.171622172 -0.02737989 -0.05425362
## [12,] -0.066822834 -0.414545357 -0.09838393 0.058653105 -0.07321890 -0.06187512
## [13,] 0.005708187 0.136617437 0.12932493 -0.189623669 -0.30577087 0.29000598
## [14,] -0.089629099 -0.082457550 -0.14258800 0.362704724 0.23863995 0.20772406
## [15,] 0.029941015 0.077753358 0.06202007 0.041037809 0.27838171 0.09378575
## [16,] -0.175240129 -0.184329063 0.04960376 0.156399786 -0.13394234 0.38875133
## [,7] [,8] [,9] [,10] [,11] [,12]
## [1,] 0.18644619 -0.19281806 -0.143334940 -0.43504941 -0.31829938 -0.0006827569
## [2,] -0.09701722 0.39278978 0.390160322 -0.14255829 -0.32233205 -0.5382270813
## [3,] -0.42291000 0.34145388 0.346383959 0.10115951 0.07644945 0.0558606833
## [4,] -0.19441891 -0.01399509 -0.633135498 0.42795143 -0.41413337 0.0103627946
## [5,] 0.36957794 0.38144821 -0.828845680 -0.16913752 -0.21980137 -0.1048631519
## [6,] -0.03937756 0.34377825 -0.124571137 -0.15839210 0.19164470 -0.1110875979
## [7,] -0.07707267 0.16724129 -0.350151658 0.08078595 0.32952553 -0.0707903057
## [8,] 0.31005588 -0.29557404 0.002799409 0.05246624 -0.49288648 0.3881639242
## [9,] 0.06023601 -0.23717369 -0.315916836 -0.02953328 0.27160171 -0.3752539754
## [10,] -0.12521099 -0.03396337 0.031071853 -0.26151145 0.16427286 0.2185252756
## [11,] 0.36634079 0.25566199 -0.422173470 -0.29623577 0.37052202 0.3485530913
## [12,] -0.53180194 -0.04759547 -0.184383258 -0.15032518 0.24546383 -0.1006838754
## [13,] 0.18483546 -0.16181123 -0.200188935 -0.14755033 -0.43550226 -0.2975659072
## [14,] -0.14455917 -0.10872443 -0.434421778 -0.11542961 -0.30341613 -0.2038914859
## [15,] 0.12473521 0.21254829 0.126445428 -0.27438286 0.29574955 -0.0166060813
## [16,] 0.08147255 0.19752879 0.116355114 -0.07993130 0.02051583 -0.4224003851
## [,13] [,14] [,15] [,16]
## [1,] 0.38634279 -0.0007085439 -0.156274021 0.26565817
## [2,] -0.40488371 -0.3433353007 0.457166672 0.13971929
## [3,] 0.01410800 0.1783063263 -0.296983391 0.51636040
## [4,] -0.09257359 0.0875857174 0.393532515 0.09416457
## [5,] -0.28497899 0.3067537844 -0.132819816 0.02839280
## [6,] -0.28649375 0.1775925159 -0.165738568 0.32616076
## [7,] 0.37432221 -0.1544353068 -0.356974065 -0.22735108
## [8,] 0.26928824 0.4040215611 0.269536018 -0.13866560
## [9,] -0.22109330 0.1480466127 -0.079814270 -0.05406047
## [10,] 0.25025889 0.3707785904 -0.009046328 -0.02022530
## [11,] 0.05004594 0.2267424762 0.128830954 0.34827989
## [12,] -0.23982549 0.1706580520 -0.013123713 0.09674814
## [13,] 0.08120941 -0.1147394851 0.360345364 0.44118407
## [14,] 0.08309616 0.3190802932 0.206942201 0.46079457
## [15,] -0.26101470 -0.0420643725 -0.168586209 0.21677449
## [16,] -0.12710108 0.3472642601 0.080618002 0.41166443
##
## [[6]]
## [1] -0.21097274 -0.02019890 -0.03659583 -0.11521966 0.11454046 0.19045204
## [7] -0.37585503 0.10573452 -0.15516104 0.13904567 -0.07318669 -0.20604302
## [13] -0.07853336 -0.06871726 0.14789663 0.11962701
##
## [[7]]
## [,1]
## [1,] -0.073339120
## [2,] -0.249016926
## [3,] -0.395099044
## [4,] -0.305241108
## [5,] 0.524895310
## [6,] 0.439581573
## [7,] -0.427478611
## [8,] 0.026368108
## [9,] -0.002637332
## [10,] 0.208679855
## [11,] -0.105449475
## [12,] -0.196138307
## [13,] -0.087272167
## [14,] -0.408085197
## [15,] 0.320306331
## [16,] 0.240999863
##
## [[8]]
## [1] 0.1093302
(results <- yelp_nn %>% evaluate(x_test, y_test))
## loss accuracy
## 0.4626670 0.7723907
Foi adicionada uma camada de dropout na rede neural, para diminuir o overfit do modelo. Observa-se que funcionou, pois a perda da base de validação não ultrappassa a perda da base de treino.
tibble(observado = factor(y_test)) %>%
bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>%
roc_auc(observado, prob)
## # A tibble: 1 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 roc_auc binary 0.837
tibble(observado = factor(y_test)) %>%
bind_cols(data.frame(prob = predict(yelp_nn, as.matrix(x_test)))) %>%
roc_curve(observado, prob) %>%
autoplot()
Pelo gráfico, observa-se que o modelo atingiu um desempenho bom na base de teste.
Criação de um usuário e definição de seu cluster
glimpse(yelp_usr_cluster)
## Rows: 119,792
## Columns: 24
## $ user_id <chr> "-4Anvj46CWf57KWI9UQDLg", "-BUamlG3H-7yqpAl1p-msw", "-C…
## $ average_stars <dbl> 3.50, 1.50, 3.00, 3.56, 3.00, 4.00, 4.17, 3.57, 4.48, 4…
## $ compliment_cool <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_cute <dbl> 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_funny <dbl> 0, 0, 0, 0, 0, 0, 0, 169, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ compliment_hot <dbl> 0, 0, 0, 0, 0, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ compliment_list <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_more <dbl> 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_note <dbl> 0, 0, 1, 0, 0, 0, 0, 16, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ compliment_photos <dbl> 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ compliment_plain <dbl> 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, …
## $ compliment_profile <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ compliment_writer <dbl> 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ cool <dbl> 2, 0, 1, 0, 1, 0, 0, 1562, 2, 1, 1, 9, 0, 5, 0, 9, 0, 0…
## $ elite_count <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ fans <dbl> 1, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
## $ friends_count <dbl> 1, 16, 15, 27, 1, 1, 1, 338, 59, 6, 10, 100, 8, 1, 79, …
## $ funny <dbl> 0, 0, 1, 0, 0, 0, 0, 1266, 3, 1, 4, 0, 1, 1, 1, 5, 0, 0…
## $ review_count_usr <dbl> 2, 2, 4, 27, 2, 6, 6, 66, 28, 3, 8, 37, 4, 20, 1, 18, 5…
## $ useful <dbl> 2, 0, 1, 5, 1, 3, 16, 1683, 12, 1, 2, 30, 4, 30, 0, 19,…
## $ year_since <dbl> 2016, 2016, 2011, 2019, 2014, 2017, 2014, 2019, 2014, 2…
## $ tips_counter <dbl> 0, 1, 0, 0, 0, 1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 2, 0, 0, …
## $ total_compliments <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ cluster_usr <int> 3, 7, 10, 9, 3, 9, 4, 9, 4, 10, 3, 10, 4, 3, 7, 9, 3, 1…
rm(user)
compliment_max <- 50
user <- tibble(user_id = 'random_user',
average_stars = round(runif(1, 1.0, 5),2),
compliment_cool = ceiling(runif(1,0, compliment_max)),
compliment_cute = ceiling(runif(1,0, compliment_max)),
compliment_funny = ceiling(runif(1,0, compliment_max)),
compliment_hot = ceiling(runif(1,0, compliment_max)),
compliment_list = ceiling(runif(1,0, compliment_max)),
compliment_more = ceiling(runif(1,0, compliment_max)),
compliment_note = ceiling(runif(1,0, compliment_max)),
compliment_photos = ceiling(runif(1,0, compliment_max)),
compliment_plain = ceiling(runif(1,0, compliment_max)),
compliment_profile = ceiling(runif(1,0, compliment_max)),
compliment_writer = ceiling(runif(1,0, compliment_max)),
cool = ceiling(runif(1,0, compliment_max)),
elite_count = 0,
fans = ceiling(runif(1,0, compliment_max)),
friends_count = ceiling(runif(1,0, compliment_max)),
funny = ceiling(runif(1,0, compliment_max)),
review_count_usr = ceiling(runif(1,0,compliment_max)),
useful = ceiling(runif(1,0, compliment_max)),
year_since = ceiling(runif(1,2004, 2019)),
tips_counter = ceiling(runif(1,0, compliment_max)),
total_compliments = ceiling(runif(1,0, compliment_max))
)
## criação aleatória do número de anos que o usuário foi elite
user$elite_count <- ceiling(runif(1,0, (2020-user$year_since)))
#encontra o número do cluster em que o usuário se encaixa
user$cluster_usr <- user_cluster_tree %>%
predict(user) %>%
ceiling()
glimpse(user)
## Rows: 1
## Columns: 24
## $ user_id <chr> "random_user"
## $ average_stars <dbl> 4.07
## $ compliment_cool <dbl> 14
## $ compliment_cute <dbl> 17
## $ compliment_funny <dbl> 2
## $ compliment_hot <dbl> 14
## $ compliment_list <dbl> 13
## $ compliment_more <dbl> 19
## $ compliment_note <dbl> 37
## $ compliment_photos <dbl> 49
## $ compliment_plain <dbl> 46
## $ compliment_profile <dbl> 31
## $ compliment_writer <dbl> 24
## $ cool <dbl> 1
## $ elite_count <dbl> 7
## $ fans <dbl> 21
## $ friends_count <dbl> 25
## $ funny <dbl> 7
## $ review_count_usr <dbl> 16
## $ useful <dbl> 32
## $ year_since <dbl> 2010
## $ tips_counter <dbl> 15
## $ total_compliments <dbl> 24
## $ cluster_usr <dbl> 8
# seleção aleatória de estabelecimentos e notas atribuídas a cada um baseado no número de reviews
n_reviews <- user$review_count_usr
reviewed_usr <- tibble(business_id = sample(yelp_bz_raw$business_id, n_reviews), #seleçao aleatória de estabelecimentos
stars_rv = ceiling(runif(n_reviews, 1.0, 5)),
year_rv = ceiling(runif(n_reviews, 2009, 2019)),
)
user_hist <- user %>%
bind_rows(replicate(n_reviews-1, user, simplify = FALSE)) %>% #replica as informações do usuário
bind_cols(reviewed_usr) %>% #junta os estabelecimentos e notas dadas
left_join(., yelp_bz_raw, by = 'business_id') #junta as informações dos estabelecimentos
glimpse(user_hist)
## Rows: 16
## Columns: 63
## $ user_id <chr> "random_user", "random_user", "random_user", "r…
## $ average_stars <dbl> 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, 4.07, 4.07,…
## $ compliment_cool <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ compliment_cute <dbl> 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,…
## $ compliment_funny <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
## $ compliment_hot <dbl> 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,…
## $ compliment_list <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,…
## $ compliment_more <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,…
## $ compliment_note <dbl> 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,…
## $ compliment_photos <dbl> 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49,…
## $ compliment_plain <dbl> 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,…
## $ compliment_profile <dbl> 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,…
## $ compliment_writer <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,…
## $ cool <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
## $ elite_count <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
## $ fans <dbl> 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,…
## $ friends_count <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ funny <dbl> 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
## $ review_count_usr <dbl> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,…
## $ useful <dbl> 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,…
## $ year_since <dbl> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010,…
## $ tips_counter <dbl> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ total_compliments <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,…
## $ cluster_usr <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
## $ business_id <chr> "r-kj-kBSKFKh0sM8EVX8AA", "H7rpWv02D6WTu6IpNNDk…
## $ stars_rv <dbl> 2, 4, 5, 4, 3, 5, 3, 3, 5, 5, 2, 5, 3, 5, 4, 3
## $ year_rv <dbl> 2016, 2011, 2016, 2018, 2019, 2014, 2011, 2011,…
## $ categories <chr> "Accessories, Women's Clothing, Men's Clothing,…
## $ latitude <dbl> 43.64127, 43.65943, 43.77336, 43.64869, 43.6659…
## $ longitude <dbl> -79.43377, -79.38252, -79.49302, -79.38544, -79…
## $ name <chr> "Frances Watson", "Bed Bath and Beyond", "Aji S…
## $ review_count <dbl> 3, 28, 41, 3, 4, 4, 16, 11, 3, 42, 351, 44, 4, …
## $ stars <dbl> 5.0, 2.5, 3.0, 3.0, 4.0, 2.0, 4.0, 2.5, 2.5, 3.…
## $ AcceptsInsurance <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ AgesAllowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ Alcohol <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ BYOB <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ BikeParking <dbl> 2, 2, 2, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0
## $ BusinessAcceptsCreditCards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ ByAppointmentOnly <dbl> 1, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 1
## $ Caters <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0
## $ CoatCheck <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ Corkage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ DogsAllowed <dbl> 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ DriveThru <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ GoodForDancing <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ GoodForKids <dbl> 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0
## $ HappyHour <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ HasTV <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ NoiseLevel <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0
## $ OutdoorSeating <dbl> 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0
## $ RestaurantsAttire <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0
## $ RestaurantsDelivery <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0
## $ RestaurantsGoodForGroups <dbl> 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0
## $ RestaurantsPriceRange2 <dbl> 3, 2, 2, 0, 2, 0, 0, 3, 0, 1, 3, 2, 0, 0, 3, 0
## $ RestaurantsReservations <dbl> 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0
## $ RestaurantsTableService <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0
## $ RestaurantsTakeOut <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 1, 0
## $ Smoking <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## $ WheelchairAccessible <dbl> 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 0, 2, 0, 0, 0, 0
## $ WiFi <dbl> 0, 0, 1, 0, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0
## $ tips_counter_bz <dbl> 0, 6, 7, 0, 4, 2, 0, 0, 1, 1, 37, 3, 4, 0, 3, 0
## $ total_compliments_bz <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
recomm_f <- function(user, reviewed_usr){
to_go <- yelp_raw %>%
filter(stars_rv >= 4) %>%
filter(cluster_usr == user$cluster_usr) %>%
#filter(cluster_usr == user$cluster) %>%
select(business_id) %>%
distinct()
n_go <- nrow(to_go)
#filtra todos os estabelecimentos do cluster do usuário e junta as informações para modelagem
to_review <- user %>%
bind_rows(replicate(n_go-1, user, simplify = FALSE)) %>% #replica as informações do usuário
bind_cols(to_go) %>% #junta os estabelecimentos e notas dadas
left_join(., yelp_bz_raw, by = 'business_id')
#prepara a base para o modelo
user_x_test <- to_review %>%
select_if(is.numeric) %>%
#select(-stars_rv) %>%
scale(center = mean, scale = std) %>%
as.matrix()
#aplica a base no modelo
predictions <- as_tibble(predict(yelp_nn, user_x_test))
#seleciona as principais recomendações
recommendation <- to_review %>%
bind_cols(pred = predictions) %>%
anti_join(., reviewed_usr, by = 'business_id') %>%
filter(V1 > 0.8)
}
Para validar as recomendações, é feito o teste também com um usuário aleatório da base de teste.
n <- ceiling(runif(1,1,nrow(yelp_test)))
(random_user <- yelp_raw[n,]$user_id)
## [1] "UuH7pyPsm4E5bDfXaQJ9dg"
user <- yelp_usr_cluster %>%
filter(user_id == random_user)
reviewed_usr <- yelp_raw %>%
filter(user_id == random_user)
recommendation <- recomm_f(user,reviewed_usr)
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#top 5 recomendações
recommendation %>%
#group_by(categories) %>%
top_n(5, V1) %>%
arrange(-V1) %>%
#ungroup() %>%
#mutate(name = reorder_within(name, V1)) %>%
ggplot(aes(x = V1, y = name)) +
geom_col() +
#facet_wrap(~categories, scales = "free_y") +
#scale_y_reordered() +
labs(x = "Probabilidade de avaliação positiva",
y = 'Recomendação')
#categorias
recommendation %>%
unnest_tokens(category, categories) %>%
filter(category %in% c('food', 'restaurants','bar','club','pub','pizza','pasta','italian')) %>%
group_by(category) %>%
mutate(pred_avg = mean(V1)) %>%
ungroup() %>%
#arrange(-V1) %>%
unique() %>%
top_n(n = 5, wt = V1) %>%
mutate(name = reorder_within(name, -V1, category)) %>%
ggplot(aes(V1, name, fill = category)) +
geom_col(show.legend = TRUE) +
facet_wrap(~category, scales = 'free') +
scale_x_continuous() +
scale_y_reordered() +
labs(x = 'Probabilidade de boa avaliação')